rm(list=ls())
packs = c('tidyverse', 'stringr', 'lmtest',
          'hrbrthemes', 'sjPlot', 'glmmTMB')
source("r/helpers.R")
loadPkg(packs)


# deal with fonts
extrafont::loadfonts()

# load all data
load('repFile/data/combined-modeling-data.rda')


## data for full models
modDat = 
  df %>%
  mutate_at(.vars = vars(CC8:LL28, CC4), funs(as.numeric)) %>% 
  # reverse police variable coding for interpretation
  mutate(LL26 = LL26*-1)



# Neighborhood rates figure -----------------------------------------------

# neighborhood-report
wave_dict = list('Bangalore' = 'Bangalore (2016)',
                    'Bangalore17' = 'Bangalore (2017)',
                    'Jaipur' = 'Jaipur (2016)',
                    'Patna' = 'Patna (2016)'
                    )

wave_labeler <- function(variable,value){
  return(wave_dict[value])
}

p1 =
  modDat %>%
  select(theft_combined, pol_lat_neigh, neigh_socVar, AA7, CITY) %>%
  group_by(AA7) %>%
  mutate(theft_combined = mean(theft_combined)) %>%
  distinct(AA7, .keep_all = T) %>%
  ggplot(data = ., aes(x = neigh_socVar, y = theft_combined)) +
  geom_point() +
  facet_grid(~CITY, scales = 'free', labeller = wave_labeler) +
  theme_ipsum_rc(grid = 'Y') +
  #ylim(c(0, 4)) +
  labs(x = 'Slum-level Social Density', y = 'Avg. number of items reported')

p2 =
  modDat %>%
  select(theft_combined, pol_lat_neigh, neigh_socVar, AA7, CITY) %>%
  group_by(AA7) %>%
  mutate(theft_combined = mean(theft_combined)) %>%
  distinct(AA7, .keep_all = T) %>%
  ggplot(data = ., aes(x = pol_lat_neigh, y = theft_combined)) +
  geom_point() +
  facet_grid(~CITY, scales = 'free', labeller = wave_labeler) +
  theme_ipsum_rc(grid = 'Y')  + 
  #ylim(c(0, 4)) +
  labs(x = 'Slum-level Political Density', y = 'Avg. number of items reported')

g = gridExtra::grid.arrange(p1, p2, ncol = 1)

ggsave('repFile/paper/figures/neighborhood-rates.pdf', plot = g,
       device = cairo_pdf, height = 5, width = 10)
ggsave('repFile/paper/figures/neighborhood-rates.tiff', plot = g,
       device = "tiff", height = 5, width = 10)


# density plots
df %>%
   ungroup %>%
   select(socVar, pol_lat, neigh_socVar, pol_lat_neigh) %>%
   gather() %>%
   mutate(neigh = ifelse(str_detect(key, 'socVar'), 1, 0)) %>%
   mutate(key = factor(key, levels = c('socVar', 'pol_lat',
                                       'neigh_socVar', 'pol_lat_neigh'))) %>%
   ggplot(data = ., aes(x = value, fill = key)) +
   geom_histogram(color = "white") +
   facet_grid(~key, scales = 'free') +
   theme_ipsum_rc(grid = 'Y') +
   scale_fill_ipsum(labels = c('Social\n Connectedness', 
                                'Political\n Connectedness',
                                'Social\n Density', 'Political\n Density')) +
   scale_color_brewer(palette = "Set2") +
   theme(strip.text = element_blank(),
         legend.title = element_blank(),
         legend.position = 'bottom') +
   labs(x = '', y = '', title = "Distribution of Connectedness and Density Variables")
 ggsave('repFile/paper/figures/iv-dist.pdf', device = cairo_pdf)
 ggsave('repFile/paper/figures/iv-dist.tiff', device = "tiff")
 


# Outcome: how many items would you report --------------------------------
rhs_soc = c('socVar', 'neigh_socVar', 'CC4', 'woman', 'SC', 'ST', 
          'CC50', 'frac_rel', 'frac_caste',
          'muslim', 'asset_sum', 
          'jaipur_dum', 'bangalore_dum', 'bangalore17_dum')

rhs_pol = c('pol_lat', 'pol_lat_neigh', 'CC4', 'woman', 'SC', 'ST', 
            'CC50', 'frac_rel', 'frac_caste',
            'muslim', 'asset_sum', 
            'jaipur_dum', 'bangalore_dum', 'bangalore17_dum')

labs = c('Intercept', 
         'Social Connectedness', 
         'Social Density', 
         'Political Connectedness',
         'Political Density',
         'Age', 'Gender = Man', 
         'Low Caste', 
         'Tribal Caste', 
         'Education', 
         'Religious Fractionalization',
         'Caste Fractionalization',
         'Muslim', 
         'Asset Index') %>% 
  str_to_sentence()


# models
m1 = 
  lm(formula(paste0('theft_combined ~', paste(rhs_soc, collapse = '+'), '-AA7')),
     data = modDat)

m2 = 
  lm(formula(paste0('theft_combined ~', paste(rhs_pol, collapse = '+'),'-AA7')),
     data = modDat)

# cluster errors
cls1 = coeftest(m1, vcovCluster(m1, m1$model$AA7))
cls2 = coeftest(m2, vcovCluster(m2, m2$model$AA7))



# output table
stats = list(c('N:', nrow(m1$model), nrow(m2$model)))

stargazer::stargazer(cls1,cls2, font.size = "scriptsize",
          type = 'latex', 
          title = 'Effect of individual connectedness and neighborhood density on willingness to report theft. Errors clustered at slum-level.', 
          label = 'table:how-many', 
          out = 'repFile/paper/figures/table-how-many.tex',
          covariate.labels = labs,
          column.labels = c('Number of items willing to report'),
          column.separate = c(2),
          dep.var.labels.include = F,
          #multicolumn = F,
          intercept.bottom = F,
          omit = 'dum', 
          no.space = F, 
          add.lines = stats)



# get predicted values
pDat = 
  rbind(cbind(get_model_data(m1, type = 'pred', terms = 'socVar'), 
              outcome = 'Social Connectedness'),
        cbind(get_model_data(m2, type = 'pred', terms = 'pol_lat_neigh'), 
          outcome = 'Political Density'))


# plot
ggplot(pDat, aes(x = x, y = predicted)) + geom_line(size = 2) + 
  facet_wrap(~outcome, scales = 'free_x') + 
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high), alpha = .4) + 
  theme_ipsum_rc(grid = 'Y') + 
  labs(x = 'Level of connectedness or density', 
       y = 'Number of items willing to report', 
       title = "Marginal Effect on Willingness to Report Theft")

ggsave(filename = 'repFile/paper/figures/mfx-howmany.pdf', 
       device = cairo_pdf)
ggsave(filename = 'repFile/paper/figures/mfx-howmany.tiff', 
       device = "tiff")


# Report Individual Items -------------------------------------------------

## items:
m1 = 
  glm(formula(paste0('report_moto ~', paste(rhs_soc, collapse = '+'),'-AA7')),
     data = subset(modDat, modDat$owns_moto == 1), 
     family = binomial(link = 'logit'))

m2 = 
  glm(formula(paste0('report_tv ~', paste(rhs_soc, collapse = '+'),'-AA7')),
      data = subset(modDat, modDat$owns_tv == 1), 
      family = binomial(link = 'logit'))

m3 = 
  glm(formula(paste0('report_stove ~', paste(rhs_soc, collapse = '+'),'-AA7')),
      data = subset(modDat, modDat$owns_stove == 1), 
      family = binomial(link = 'logit'))

m4 = 
  glm(formula(paste0('report_moto ~', paste(rhs_pol, collapse = '+'),'-AA7')),
      data = subset(modDat, modDat$owns_moto == 1), 
      family = binomial(link = 'logit'))

m5 = 
  glm(formula(paste0('report_tv ~', paste(rhs_pol, collapse = '+'),'-AA7')),
      data = subset(modDat, modDat$owns_tv == 1), 
      family = binomial(link = 'logit'))

m6 = 
  glm(formula(paste0('report_stove ~', paste(rhs_pol, collapse = '+'),'-AA7')),
      data = subset(modDat, modDat$owns_stove == 1), 
      family = binomial(link = 'logit'))

# clustered ses
cls1 = coeftest(m1, vcovCluster(m1, m1$model$AA7))
cls2 = coeftest(m2, vcovCluster(m2, m2$model$AA7))
cls3 = coeftest(m3, vcovCluster(m3, m3$model$AA7))
cls4 = coeftest(m4, vcovCluster(m4, m4$model$AA7))
cls5 = coeftest(m5, vcovCluster(m5, m5$model$AA7))
cls6 = coeftest(m6, vcovCluster(m6, m6$model$AA7))

# output table
stats = list(c('N:', 
               nrow(m1$model), 
               nrow(m2$model), 
               nrow(m3$model), 
               nrow(m4$model), 
               nrow(m5$model), 
               nrow(m6$model)))

stargazer(cls1,cls2, cls3, cls4, cls5, cls6, 
          type = 'latex', 
          title = 'Effect of individual connectedness and neighborhood density on willingness to report theft of individual items. Errors clustered at slum-level.', 
          label = 'table:indie-report', 
          out = 'repFile/paper/figures/table-indie-report.tex',
          covariate.labels = labs,
          column.labels = c(rep(c('Moto', 'TV', 'Stove'), 2)),
          dep.var.labels.include = F,
          multicolumn = F,
          intercept.bottom = F,
          omit = 'dum', 
          no.space = F, 
          add.lines = stats)




# Police Satisfaction -----------------------------------------------------


# models
m1 = 
  lm(formula(paste0('LL26 ~', paste(rhs_soc, collapse = '+'), '-AA7')),
      data = modDat)

m2 = 
  lm(formula(paste0('LL26 ~', paste(rhs_pol, collapse = '+'),'-AA7')),
      data = modDat)

# cluster errors
cls1 = coeftest(m1, vcovCluster(m1, m1$model$AA7))
cls2 = coeftest(m2, vcovCluster(m2, m2$model$AA7))


# output table
stats = list(c('N:', 
               nrow(m1$model), 
               nrow(m2$model)))

stargazer::stargazer(cls1,cls2, font.size = "scriptsize",
          type = 'latex', 
          title = 'Effect of individual connectedness and neighborhood density on satisfaction with local policing. Errors clustered at slum-level.', 
          label = 'table:police-satisfy', 
          out = 'repFile/paper/figures/table-police-satisfy.tex',
          covariate.labels = labs,
          column.labels = c('Satisfaction with local policing'),
          column.separate = c(2),
          dep.var.labels.include = F,
          #multicolumn = F,
          intercept.bottom = F,
          omit = 'dum', 
          no.space = F, 
          add.lines = stats)


# mfx
# get predicted values
pDat = 
  rbind(cbind(get_model_data(m1, type = 'pred', terms = 'socVar'), 
              outcome = 'Social Connectedness'),
        cbind(get_model_data(m1, type = 'pred', terms = 'neigh_socVar'), 
              outcome = 'Social Density'),
        cbind(get_model_data(m2, type = 'pred', terms = 'pol_lat_neigh'), 
              outcome = 'Political Density'))


# plot
ggplot(pDat, aes(x = x, y = predicted)) + geom_line(size = 2) + 
  facet_wrap(~outcome, scales = 'free_x') + 
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high), alpha = .4) + 
  theme_ipsum_rc(grid = 'Y') + 
  labs(x = 'Level of connectedness or density', 
       y = 'Level of satisfaction with police (from -4 to 0)', 
       title = "Marginal Effect on Satisfaction with Local Policing")
ggsave(filename = 'repFile/paper/figures/mfx-police-satisfy.pdf', device = cairo_pdf)
ggsave(filename = 'repFile/paper/figures/mfx-police-satisfy.tiff', device = "tiff")



# Local Conflict (individual) ---------------------------------------------

# models
m1 = 
  lm(formula(paste0('LL32 ~', paste(rhs_soc, collapse = '+'), '-AA7')),
      data = modDat)

m2 = 
  lm(formula(paste0('LL32 ~', paste(rhs_pol, collapse = '+'),'-AA7')),
      data = modDat)

# cluster errors
cls1 = coeftest(m1, vcovCluster(m1, m1$model$AA7))
cls2 = coeftest(m2, vcovCluster(m2, m2$model$AA7))


# output table
stats = list(c('N:', 
               nrow(m1$model), 
               nrow(m2$model)))

stargazer(cls1,cls2, 
          type = 'latex', 
          title = 'Effect of individual connectedness and neighborhood density on incidence of local conflict. Errors clustered at slum-level.', 
          label = 'table:indie-conflict', 
          out = 'repFile/paper/figures/table-indie-conflict.tex',
          covariate.labels = labs,
          column.labels = c('Frequency of local conflicts'),
          column.separate = c(2),
          dep.var.labels.include = F,
          #multicolumn = F,
          intercept.bottom = F,
          omit = 'dum', 
          no.space = F, 
          add.lines = stats) # DONE




# Local Conflict (slum) ---------------------------------------------------

# calculate slum level conflict experience
modDat_slum = 
  modDat %>% 
  group_by(AA7) %>% 
  summarise_at(vars('CC4', 'woman', 'SC', 'ST',
                    'CC50', 'muslim', 'asset_sum', 'LL32'), 
               funs(mean(., na.rm = T)))

# merge back in slum level variables
slum_vars = 
  modDat %>% 
  select(AA7, neigh_socVar, pol_lat_neigh, 
         frac_rel, frac_caste,
         jaipur_dum, bangalore_dum, bangalore17_dum) %>% 
  distinct(AA7, .keep_all = T)

modDat_slum = left_join(modDat_slum, slum_vars, by = 'AA7')

# slum level model
slum_soc = c('neigh_socVar', 'CC4', 'woman', 'SC', 'ST', 
          'CC50', 'frac_rel', 'frac_caste',
          'muslim', 'asset_sum', 'jaipur_dum', 'bangalore_dum', 'bangalore17_dum')

slum_pol = c('pol_lat_neigh', 'CC4', 'woman', 'SC', 'ST', 
             'CC50', 'frac_rel','frac_caste',
             'muslim', 'asset_sum', 'jaipur_dum', 'bangalore_dum', 'bangalore17_dum')


# models
m1 = 
  lm(formula(paste0('LL32 ~', paste(slum_soc, collapse = '+'), '-AA7')),
      data = modDat_slum)

m2 = 
  lm(formula(paste0('LL32 ~', paste(slum_pol, collapse = '+'),'-AA7')),
      data = modDat_slum)


# fix labels
labs_conf = labs[!str_detect(labs, 'connectedness')]

# extract stats
stats = list(c('N:', 
               nrow(m1$model), 
               nrow(m2$model)))

# output table
stargazer(m1,m2, 
          type = 'latex', 
          title = 'Effect of neighborhood density on incidence of local conflict.', 
          label = 'table:slum-conflict', 
          out = 'repFile/paper/figures/table-slum-conflict.tex',
          covariate.labels = labs_conf,
          column.labels = c('Average frequency of conflict in slum'),
          column.separate = c(2),
          dep.var.labels.include = F,
          #multicolumn = F,
          intercept.bottom = F,
          omit = 'dum', 
          keep.stat = c('n', 'adj.rsq'))



# Leader Help (major theft) -----------------------------------------------


# models
m1 = 
  glm(formula(paste0('majorTheft ~', paste(rhs_soc, collapse = '+'), '-AA7')),
      data = modDat, family = binomial(link = 'logit'))

m2 = 
  glm(formula(paste0('majorTheft ~', paste(rhs_pol, collapse = '+'),'-AA7')),
      data = modDat, family = binomial(link = 'logit'))

# cluster errors
cls1 = coeftest(m1, vcovCluster(m1, m1$model$AA7))
cls2 = coeftest(m2, vcovCluster(m2, m2$model$AA7))


# output table

# extract stats
stats = list(c('N:', 
               nrow(m1$model), 
               nrow(m2$model)))

stargazer::stargazer(cls1,cls2, font.size = "scriptsize",
          type = 'latex', 
          title = 'Effect of individual connectedness and neighborhood density on probability of asking leader for help with a major theft. Errors clustered at slum-level.', 
          label = 'table:major-theft', 
          out = 'repFile/paper/figures/table-major-theft.tex',
          covariate.labels = labs,
          column.labels = c('Ask Leader for Help with Major Theft'),
          column.separate = c(2),
          dep.var.labels.include = F,
          #multicolumn = F,
          intercept.bottom = F,
          omit = 'dum', 
          no.space = F, 
          add.lines = stats)



# mfx
# get predicted values
pDat = 
  rbind(cbind(get_model_data(m1, type = 'pred', terms = 'socVar'), 
              outcome = 'Social Connectedness'),
        cbind(get_model_data(m1, type = 'pred', terms = 'neigh_socVar'), 
              outcome = 'Social Density'),
        cbind(get_model_data(m2, type = 'pred', terms = 'pol_lat'), 
              outcome = 'Political Connectedness'),
        cbind(get_model_data(m2, type = 'pred', terms = 'pol_lat_neigh'), 
              outcome = 'Political Density'))


# plot
ggplot(pDat, aes(x = x, y = predicted)) + geom_line(size = 2) + 
  facet_wrap(~outcome, scales = 'free_x') + 
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high), alpha = .4) + 
  theme_ipsum_rc(grid = 'Y') + 
  scale_y_percent() + 
  labs(x = 'Level of connectedness or density', 
       y = 'Probability ask leader help with theft', 
       title = "Marginal Effect on Likelihood of Asking Leader for Help with Major Theft")

ggsave(filename = 'repFile/paper/figures/mfx-major-theft.pdf',
       device = cairo_pdf)
ggsave(filename = 'repFile/paper/figures/mfx-major-theft.tiff',
       device = "tiff")



# Bribe prices ------------------------------------------------------------


# get data
rhs_soc = c('socVar', 'neigh_socVar', 'CC4', 'woman', 'SC', 'ST', 
            'CC50', 'frac_rel', 'frac_caste',
            'muslim', 'asset_sum')

rhs_pol = c('pol_lat', 'pol_lat_neigh', 'CC4', 'woman', 'SC', 'ST', 
            'CC50', 'frac_rel', 'frac_caste',
            'muslim', 'asset_sum')

# subset
modDat_moto = 
  modDat %>% 
  filter(moto_bribe <= 5000) %>% 
  mutate(lmoto_bribe = log(moto_bribe + 1))

# plot variation
modDat_moto %>% 
  ggplot(aes(x = AA7, y = moto_bribe, group = AA7)) + 
  geom_boxplot() + 
  theme_ipsum_rc() + 
  coord_flip() + 
  theme(legend.position = 'none') + 
  labs(y = 'Expected bribe (in rupees)', 
       x = 'Slum identifiers')
ggsave('repFile/paper/figures/bribe-dist.pdf', device = cairo_pdf)  
ggsave('repFile/paper/figures/bribe-dist.tiff', device = "tiff")  



# zinf models
m1 =
  pscl::zeroinfl(formula(paste0('moto_bribe ~', paste(rhs_soc, collapse = '+'), '-AA7')),
      data = modDat_moto, dist = 'negbin')

cls1 = clx2(m1, 1, m1$model$AA7)


m2 =
  pscl::zeroinfl(formula(paste0('moto_bribe ~', paste(rhs_pol, collapse = '+'), '-AA7')),
                 data = modDat_moto, dist = 'negbin')

cls2 = clx2(m2, 1, m2$model$AA7)


# table output
stats = list(c('N:', 
               nrow(m1$model), 
               nrow(m2$model)))


labs = c("Intercept", "Social connectedness", "Social density", 
         "Political connectedness", "Political density")

stargazer(cls1,cls2, font.size = "scriptsize",
          type = 'latex', 
          title = 'Effect of individual connectedness and neighborhood density on amount paid in bribe. Zero-inflated negative binomial models. Errors clustered at slum-level. Control variables not shown.', 
          label = 'table:zinf-bribe', 
          out = 'repFile/paper/figures/table-zinf-bribe.tex',
          covariate.labels = c(paste0('Count: ', labs), 
                               paste0('Zero-Inflation: ', labs)),
          column.labels = c('Bribe paid to investigate theft of moto, rupees'),
          column.separate = c(2),
          dep.var.labels.include = F,
          #multicolumn = F,
          intercept.bottom = F,
          keep = c("Intercept", "pol_lat", "socVar"),
          no.space = F, 
          add.lines = stats)


# zinf models using TMB for mfx
m1 = glmmTMB(formula(paste0('moto_bribe ~', 
                                     paste(rhs_soc, collapse = '+'), '+ (1|AA7)')), 
             data = modDat_moto, ziformula = ~., family = nbinom2)

# m2 = glmmTMB(formula(paste0('moto_bribe ~', paste(rhs_pol, collapse = '+'), '+ (1|AA7)')), 
#              data = modDat_moto, ziformula = ~., family = nbinom2)



# mfx plot
pDat = get_model_data(m1, terms = 'socVar', type = 'pred')

ggplot(pDat, aes(x = x, y = predicted)) + geom_line(size = 2) + 
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high), alpha = .4) + 
  theme_ipsum_rc(grid = 'Y') +
  labs(y = 'Bribe amount (in rupees)', x = 'Level of social connectedness', 
       title = "Marginal Effect on Size of Bribe") + 
  ylim(c(0, 2500))
ggsave(filename = 'repFile/paper/figures/mfx-bribe-moto.pdf', device = cairo_pdf)
ggsave(filename = 'repFile/paper/figures/mfx-bribe-moto.tiff', device = "tiff")
